1 Install Packages

2 Load Packages

3 Load the data

The output ko_abundance.tsv file was open with Excel to create an Excel file (nf_totalabundance.xlsx) with all three necessary data groups to work with Phyloseq objects: - RPKM_df contains the abundance of each KO in the sample calculated in RPKM. This information is in the ‘RPKM’ sheet of the ‘nf_totalabundance.xlsx’ file. - KO_df contains the KO ids together with its description and symbol. This information is in the ‘KO’ sheet of the ‘nf_totalabundance.xlsx’ file. - samples_df contains the metadata of each sample. This information is in the ‘metadata’ sheet of the ‘nf_totalabundance.xlsx’ file.

#Loading the data
#setwd("~/Desktop/RyC_TFM/final_results")
#setwd("~/Downloads")

RPKM_df <- read_excel("nf_totalabundance.xlsx",  sheet = "RPKM")

KO_df  <- read_excel("nf_totalabundance.xlsx", sheet = "NF")

samples_df  <- read_excel("nf_totalabundance.xlsx", sheet = "metadata")

# We want to add a new samples variable to merge the subjects' Group (Donor, Fecal Microbial Transplant (FMT) or Placebo) with the data collecting Weeks variables
samples_df$group_week <- paste (samples_df$Group, samples_df$week, sep = "_")

4 Creating phyloseq object

A Phyloseq object is created with the data stored in the previously loaded dataframes that are firstly transformed to matrices when needed. Additionally, abundances are normalized using the Median Scaling method. We will obtain the ‘raw_ps’ phyloseq object with the raw read count data.

# Define the row names from the Novel_Fam_ko column in the RPKM_df
RPKM_df <- RPKM_df %>%
  tibble::column_to_rownames("Novel_Fam_ko")

RPKM_df[is.na(RPKM_df)] <- 0

# Idem for the other dataframes
KO_df <- KO_df %>% 
  tibble::column_to_rownames("Novel_Fam_ko")

samples_df <- samples_df %>% 
  tibble::column_to_rownames("Sample_ID")

#Transform into matrices RPKM and KO tables (sample table can be left as data frame)
RPKM_mat <- as.matrix(RPKM_df)

KO_mat <- as.matrix(KO_df)

class(RPKM_mat) <- "numeric" # Because it contains the RPKM calculated for each KO. We are going to consider this values as normalized read counts for each KO

# Transform to Phyloseq objects. 
# Since PhyloSeq is usually implemented for taxonomy and here we are working with functional annotations, some equivalencies are made:
RPKM = otu_table(RPKM_mat, taxa_are_rows = TRUE) 
KO = tax_table(KO_mat)
samples = sample_data(samples_df)

# We want to have the raw phyloseq object which contains normalized read counts (RPKM) as integer numbers 
raw_ps <- phyloseq(RPKM, KO, samples)
round_fun <- function(x) round(x)
raw_ps <- transform_sample_counts(raw_ps, round_fun) 

4.0.1 Overview of the created Phyloseq Object

Since PhyloSeq is usually implemented for taxonomy and here we are working with functional annotations, some equivalencies are made:

Taxa in this case is refering to the KOs identifiers. Each KO identifier has a Description, Symbol and again the KO (3 taxonomic ranks) which are stored in the Taxonomy table. The OTU Table corresponds then to the relative abundance of each KO (taxa) in RPKM units for each of the 117 samples that have 15 variables stored in the Sample Data section.

raw_ps # Phyloseq Object schema
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20563 taxa and 117 samples ]
## sample_data() Sample Data:       [ 117 samples by 15 sample variables ]
## tax_table()   Taxonomy Table:    [ 20563 taxa by 1 taxonomic ranks ]
ntaxa(raw_ps) # Number of KO
## [1] 20563
nsamples(raw_ps) # Number of samples
## [1] 117
sample_names(raw_ps) # All sample names
##   [1] "R12W8"     "R2W0"      "R16W24"    "543-46G1"  "R30W7"     "R2W1"     
##   [7] "R16W0"     "R8W7"      "R14W4"     "R14W7"     "R11W24"    "R30W4"    
##  [13] "R20W24"    "R24W1"     "R20W8"     "R14W0"     "R18W24"    "R4W8"     
##  [19] "R2W24X"    "R12W24"    "R24W7"     "R14W1"     "R23W24"    "R29W24"   
##  [25] "R2W24Y"    "R2W4"      "R2W7"      "0505-0101" "R30W1"     "R30W0"    
##  [31] "R16W7"     "R24W4"     "R24W24"    "R8W0"      "R26W0"     "R18W8"    
##  [37] "R10W7"     "R20W0"     "R14W24"    "R4W7"      "R18W4"     "R24W8"    
##  [43] "R20W1"     "R6W0"      "R12W1"     "R18W7"     "R4W4"      "R2W8"     
##  [49] "R12W0"     "R19W24"    "R21W24"    "R6W7"      "R30W8"     "R18W0"    
##  [55] "R20W4"     "R12W7"     "R22W0"     "R10W24"    "R18W1"     "R10W0"    
##  [61] "R4W1"      "R12W4"     "R4W0"      "R14W8"     "R17W24"    "R20W6"    
##  [67] "R25W0"     "R8W24"     "R1W7"      "R17W1"     "R3W0"      "R9W7"     
##  [73] "R1W5"      "R29W8"     "R17W0"     "R7W8"      "R30W24"    "R23W8"    
##  [79] "R3W7"      "R9W0"      "R6W24"     "R27W0"     "R17W7"     "R17W4"    
##  [85] "585-57G1"  "R1W1"      "R1W24"     "R11W8"     "R1W0"      "R19W7"    
##  [91] "R7W0"      "R17W8"     "R29W0"     "R23W7"     "R11W4"     "R29W1"    
##  [97] "R13W0"     "R7W1"      "R21W0"     "R7W24"     "R23W4"     "R11W7"    
## [103] "R29W5"     "R21W7"     "R4W24"     "R1W8"      "R11W0"     "R7W4"     
## [109] "R5W0"      "R11W1"     "R19W0"     "R3W24"     "R23W1"     "R9W24"    
## [115] "R7W7"      "R29W7"     "R23W0"
rank_names(raw_ps) # KO's characteristics
## [1] "KO"
sample_variables(raw_ps) # Sample variables
##  [1] "week"                       "id"                        
##  [3] "Group"                      "Donor"                     
##  [5] "Age"                        "Sex"                       
##  [7] "AIDS_events"                "Nadir.CD4.T.cell"          
##  [9] "CD4..T.cells"               "CD4.CD8.ratio"             
## [11] "Antibiotics_.past_6_months" "Antibiotics_past_3_months" 
## [13] "Completed.Follow.up"        "Sample_id"                 
## [15] "group_week"
otu_table(raw_ps)[1:5, 1:5] # Overview of the relative abundance table of each KO in RPKM
## OTU Table:          [5 taxa and 5 samples]
##                      taxa are rows
##          R12W8 R2W0 R16W24 543-46G1 R30W7
## NOVX6ZVY    30    0      0        0     0
## NOVKMEC2    30    0      0        0     0
## NOVDNGQ3    95    0      0        0     0
## NOVP75FI    32    0      0        0     0
## NOV86T3H   171    0      0        0     0
tax_table(raw_ps)[1:5] # Overview of the the characteristics of each KO
## Taxonomy Table:     [5 taxa by 1 taxonomic ranks]:
##          KO        
## NOVX6ZVY "NOVX6ZVY"
## NOVKMEC2 "NOVKMEC2"
## NOVDNGQ3 "NOVDNGQ3"
## NOVP75FI "NOVP75FI"
## NOV86T3H "NOV86T3H"

5 Filtering and Normalizing

5.1 Filtering Data

In this step we are selecting the samples we want to work with and creating different subset raw phyloseq objects.

incomplete <- c("R24", "R13", "R22", "R25", "R26", "R27", "R5") # List of id patients with incomplete data

# Here we are selecting data collected at week 0 and 7 from both FMT, placebo and Donor subjects except from subjects with incomplete data
ps_raw_subset <- subset_samples(raw_ps, (week == "Week_0" |  week == "Week_7") & !(id %in% incomplete))

# Here we are selecting data collected at week 0 and 7 from subjects that received the FMT from the already subset phyloseq object
ps_raw_FMT <- subset_samples(ps_raw_subset, Group=="FMT")

# Here we are selecting data collected at week 0 and 7 from Placebo subjects from the already subset phyloseq object
ps_raw_placebo <- subset_samples(ps_raw_subset, Group=="Placebo")

# Here we are selecting the samples collected at each week
ps_raw_week0 <- subset_samples(ps_raw_subset, (Group == "Placebo" |  Group == "FMT") & week == "Week_0") 
ps_raw_week7 <- subset_samples(ps_raw_subset, (Group == "Placebo" |  Group == "FMT") & week == "Week_7") 

5.2 DESeq2 Normalization

The DESeq2 normalization method is a statistical approach used to adjust for systematic biases and differences in sequencing depth when analyzing count data. It involves estimating size factors, which account for differences in library size between samples, and dispersion, which capture the variability in read counts. The method applies a variance-stabilizing transformation to the counts, making them more suitable for downstream statistical analysis. This normalization process allows for valid comparisons between samples and enables the identification of statistically significant differences in expression levels or abundances while controlling for technical variations.

set.seed(0503)

# First we need to convert the raw phyloseq object to a DESeqDataSet object 
ps_dds <- phyloseq_to_deseq2(ps_raw_subset, ~Group + week )

gm_mean = function(x, na.rm=TRUE){  exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x))} # geometric mean
geoMeans = apply(counts(ps_dds), 1, gm_mean)
ps_dds = estimateSizeFactors(ps_dds, geoMeans = geoMeans) # estimate size factors. Size factors account for differences in sequencing depth between samples and are used to normalize the read counts

abund<-as.matrix(counts(ps_dds, normalized=TRUE)) # variance-stabilized transformed data. This transformation stabilizes the variance and makes the data more suitable for statistical analysis.

# # making our phyloseq object with transformed table
vst_count_phy <- otu_table(abund, taxa_are_rows=T)
sample_info_tab_phy <- sample_data(samples_df)
vst_physeq <- phyloseq(vst_count_phy, sample_info_tab_phy)

6 First glance at results

Additionally, other subsets with the 50 most abundant KO among all samples and the KOs involved in the Butyrate production pathway are created.

# Get the top50 most abundant KOs out of the filtered phyloseq object
top50 <- names(sort(taxa_sums(ps_raw_subset), decreasing=TRUE)[1:50])

# Create a phyloseq object only with the top50 KOs
ps_top50 = prune_taxa(top50, ps_raw_subset)

This subsets are visually explored in bar plots.

6.1 Most abudant KOs

plot <- plot_bar(ps_top50, fill = "KO") + 
  geom_bar(aes(color=KO, fill=KO), stat="identity", position="fill") + 
  labs(x="Samples", y="Abundance", title="The 50 most abundant KO") +
  facet_wrap(~week, scales="free_x") +
  theme(axis.text.x = element_text(size = 15),
        axis.text.y = element_text(size = 20),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        legend.title = element_text(size = 20), 
        legend.text = element_text(size = 15),
        strip.text = element_text(size = 20),
        plot.title = element_text(size = 30))
plot 

ps_top50_relab = phyloseq::transform_sample_counts(ps_top50, function(x){(x / sum(x))})
plot <- plot_bar(ps_top50_relab, fill = "KO") + 
  geom_bar(aes(color=KO, fill=KO), stat="identity", position="fill") + 
  labs(x="Samples", y="Relative abundance between Top50", title="The 50 most abundant KO") + 
  facet_wrap(~week, scales="free_x") +
  theme(axis.text.x = element_text(size = 15),
        axis.text.y = element_text(size = 20),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        legend.title = element_text(size = 20), 
        legend.text = element_text(size = 15),
        strip.text = element_text(size = 20),
        plot.title = element_text(size = 30))
plot

7 Alpha Diversity

Next, we are going to calculate the Alpha Diversity of our samples. Alpha-diversity is often described as within-sample diversity. In this case, this measure will provide information about the functional richness of the sample. Here we are going to measure the alpha diversity with the observed richness and the Shannon index. The observed diversity evaluates how many different functions there are while the Shannon index also considers how evenly they are distributed.

To do so, we first need to calculate the diversity estimates from our phyloseq object, and test if the data are normally distributed using Shapiro-Wilk Normality test. If the data follows a normal distribution, we can run an ANOVA or t-test to find statistically significant difference between sample-types. If the data is not normally distributed, a non-parametric test such as Kruskal-Wallis will be considered.

7.0.1 Test Normality of the Shannon diversity estimations:

First, we need to extract the estimates of the alpha diversity and then perform a Shapiro-Wilk Normality test to see if this data follows a normal distribution. The null hypothesis of this test is that “sample distribution is normal”. If the test is significant, the distribution is non-normal.

# Extract Shannon results
alpha_results <- estimate_richness(ps_raw_subset, measures = c("Shannon", "Observed"))

# Shapiro-Wilk Normality test.
# The null hypothesis of this test is that “sample distribution is normal”. If the test is significant, the distribution is non-normal. 
shapiro.test(alpha_results$Shannon)
## 
##  Shapiro-Wilk normality test
## 
## data:  alpha_results$Shannon
## W = 0.95174, p-value = 0.05912
shapiro.test(alpha_results$Observed)
## 
##  Shapiro-Wilk normality test
## 
## data:  alpha_results$Observed
## W = 0.91559, p-value = 0.003007

From the output, the p-value > 0.05 implying that the distribution of the data are not significantly different from normal distribution. In other words, we can assume the normality for both Shannon alpha diversity estimates

symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", "ns"))


# Plot all functional alpha diversity estimates colored by week
plot_richness(ps_raw_subset, x="Sample_id", color="week", measures=c("Shannon", "Observed")) + 
  labs(title="Alpha Diversity of each Sample", x="Sample IDs") + 
  theme(axis.text.x = element_text(size = 12),
        axis.text.y = element_text(size = 20),
        axis.title.x = element_text(size = 25),
        axis.title.y = element_text(size = 25),
        legend.title = element_text(size = 25), 
        legend.text = element_text(size = 20),
        strip.text = element_text(size = 25),
        plot.title = element_text(size = 30))

a_my_comparisons <- list(c("FMT_Week_0", "FMT_Week_7"), c("Placebo_Week_0", "Placebo_Week_7"),  c("FMT_Week_0", "Placebo_Week_0"), c("FMT_Week_7", "Placebo_Week_7")) 
plot_richness(ps_raw_subset, x="group_week", color="group_week", nrow = 2, measures=c("Shannon", "Observed")) + 
  geom_boxplot(aes(fill=group_week),alpha=0.2) + 
  labs(title="Alpha Diversity") +
  stat_compare_means(method = "t.test", comparisons = a_my_comparisons, label = "p.signif", symnum.args = symnum.args, size=8) + 
  theme(axis.text.x = element_text(size = 20),
        axis.text.y = element_text(size = 20),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        legend.title = element_text(size = 20), 
        legend.text = element_text(size = 15),
        strip.text = element_text(size = 20),
        plot.title = element_text(size = 30))

7.0.2 Plot Shannon and Observed diversity and test differences between groups.

Given the normal distribution of the data, we can plot the Alpha Diversity of each sample group and test its significance with t-tests.

b_my_comparisons <- list(c("FMT", "Placebo"), c("Donor", "FMT"), c("Placebo", "Donor"))
# Boxplot Shannon functional diversity grouped by Group and separated by Week
plot_richness(ps_raw_subset, x="Group", color="Group", measures=c("Shannon")) + 
  geom_boxplot(aes(fill=Group),alpha=0.2) + 
  labs(title="Shannon Index grouped by Subjects'") +
  stat_compare_means(method = "t.test", comparisons = b_my_comparisons, label = "p.signif", symnum.args = symnum.args, size=8) + 
  facet_wrap(~week, scales="free_x") + 
  theme(axis.text.x = element_text(size = 20),
        axis.text.y = element_text(size = 20),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        legend.title = element_text(size = 20), 
        legend.text = element_text(size = 15),
        strip.text = element_text(size = 20),
        plot.title = element_text(size = 30))

# Boxplot Observed functional diversity grouped by Group and separated by Week
plot_richness(ps_raw_subset, x="Group", color="Group", measures=c("Observed")) + 
  facet_wrap(~week, scales="free_x") + 
  geom_boxplot(aes(fill=Group),alpha=0.2) + 
  labs(title="Observed diversity grouped by Subjects'") +
  stat_compare_means(method = "t.test", comparisons = b_my_comparisons, label = "p.signif", symnum.args = symnum.args, size=8) + 
  theme(axis.text.x = element_text(size = 20),
        axis.text.y = element_text(size = 20),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        legend.title = element_text(size = 20), 
        legend.text = element_text(size = 15),
        strip.text = element_text(size = 20),
        plot.title = element_text(size = 30))

c_my_comparisons <- list( c("Week_0", "Week_7")) 
## Plot all Shannon functional alpha diversity grouped by week
plot_richness(ps_raw_subset, color = "week", x = "week", measures = c("Shannon")) + 
  geom_boxplot(aes(fill = week), alpha=0.2) +
  stat_compare_means(method = "t.test", comparisons = c_my_comparisons, label = "p.signif", symnum.args = symnum.args, size=8) +
  labs(title="Shannon Index grouped by Week") + 
  theme(axis.text.x = element_text(size = 20),
        axis.text.y = element_text(size = 20),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        legend.title = element_text(size = 20), 
        legend.text = element_text(size = 15),
        strip.text = element_text(size = 20),
        plot.title = element_text(size = 30))

## Plot FMT functional alpha diversity grouped by week

alpha_results_FMT <- estimate_richness(ps_raw_FMT, measures = c("Shannon", "Observed"))

plot_richness(ps_raw_FMT, color = "week", x = "week", measures = c("Shannon")) + 
  geom_boxplot(aes(fill = week), alpha=0.2) +
  stat_compare_means(method = "t.test", comparisons = c_my_comparisons, label = "p.signif", symnum.args = symnum.args, size=8, paired=TRUE) +
  labs(title="Shannon Index of FMT subjects grouped by Week") + 
  geom_line(aes(x = week, y = alpha_results_FMT$Shannon, group = id), color='black', alpha=0.1) +
  theme(axis.text.x = element_text(size = 20),
        axis.text.y = element_text(size = 20),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        legend.title = element_text(size = 20), 
        legend.text = element_text(size = 15),
        strip.text = element_text(size = 20),
        plot.title = element_text(size = 30))

plot_richness(ps_raw_FMT, color = "week", x = "week", measures = c("Observed")) + 
  geom_boxplot(aes(fill = week), alpha=0.2) +
  stat_compare_means(method = "t.test", comparisons = c_my_comparisons, label = "p.signif", symnum.args = symnum.args, size=8, paired=TRUE) +
  labs(title="Observed Diversity of FMT subjects grouped by Week") + 
  geom_line(aes(x = week, y = alpha_results_FMT$Observed, group = id), color='black', alpha=0.1) +
  theme(axis.text.x = element_text(size = 20),
        axis.text.y = element_text(size = 20),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        legend.title = element_text(size = 20), 
        legend.text = element_text(size = 15),
        strip.text = element_text(size = 20),
        plot.title = element_text(size = 30))

## Plot Placebo functional alpha diversity grouped by week

alpha_results_placebo <- estimate_richness(ps_raw_placebo, measures = c("Shannon", "Observed"))

plot_richness(ps_raw_placebo, color = "week", x = "week", measures = c("Shannon")) + 
  geom_boxplot(aes(fill = week), alpha=0.2) +
  stat_compare_means(method = "t.test", comparisons = c_my_comparisons, label = "p.signif", symnum.args = symnum.args, size=8, paired=TRUE) +
  labs(title="Shannon Index of Placebo subjects grouped by Week") + 
  geom_line(aes(x = week, y = alpha_results_placebo$Shannon, group = id), color='black', alpha=0.1) +
  theme(axis.text.x = element_text(size = 20),
        axis.text.y = element_text(size = 20),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        legend.title = element_text(size = 20), 
        legend.text = element_text(size = 15),
        strip.text = element_text(size = 20),
        plot.title = element_text(size = 30))

plot_richness(ps_raw_placebo, color = "week", x = "week", measures = c("Observed")) + 
  geom_boxplot(aes(fill = week), alpha=0.2) +
  stat_compare_means(method = "t.test", comparisons = c_my_comparisons, label = "p.signif", symnum.args = symnum.args, size=8, paired=TRUE, size=8) +
  labs(title="Observed Alpha Diversity of Placebo subjects grouped by Week") + 
  geom_line(aes(x = week, y = alpha_results_placebo$Observed, group = id), color='black', alpha=0.1) +
  theme(axis.text.x = element_text(size = 20),
        axis.text.y = element_text(size = 20),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        legend.title = element_text(size = 20), 
        legend.text = element_text(size = 15),
        strip.text = element_text(size = 20),
        plot.title = element_text(size = 30))

# Beta Diversity

Beta-Diversity provides a measure of dissimilarity between sample types. The functional differences between samples can be quantified with several methods. For this functional analysis, the Bray-Curtis distance estimator was chosen. This method compares the abundances of different fuction between samples and calculates the dissimilarity based on the proportion of differences.

7.1 Principal coordinate analysis (PCoA) of Bray-Curtis distances with DESeq2 transformation

Between-samples differences are visualized with a principal coordinates analysis.

set.seed(0503)

# generating and visualizing the PCoA with phyloseq
vst_pcoa <- ordinate(vst_physeq, method="PCoA", distance="bray")
eigen_vals <- vst_pcoa$values$Eigenvalues # allows us to scale the axes according to their magnitude of separating apart the samples

plot_ordination(vst_physeq, vst_pcoa, color="Group") + 
  theme(aspect.ratio=1) + 
  labs(title = "PCoA (Bray-Curtis index)", subtitle = "DESeq2 Transformation") +
  stat_ellipse(aes(group = Group), linetype = 2) +
  geom_point(size = 3) +
  theme(axis.text.x = element_text(size = 15),
        axis.text.y = element_text(size = 15),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        legend.title = element_text(size = 20), 
        legend.text = element_text(size = 15),
        plot.title = element_text(size = 25),
        plot.subtitle = element_text(size = 20))

plot_ordination(vst_physeq, vst_pcoa, color="group_week") + 
  theme(aspect.ratio=1) + 
  labs(title = "PCoA (Bray-Curtis index)", subtitle = "DESeq2 Transformation") +
  stat_ellipse(aes(group = group_week), linetype = 2) +
  geom_point(size = 3) +
  theme(axis.text.x = element_text(size = 15),
        axis.text.y = element_text(size = 15),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        legend.title = element_text(size = 20), 
        legend.text = element_text(size = 15),
        plot.title = element_text(size = 25), 
        plot.subtitle = element_text(size = 20))

7.2 Betadisper and permutational ANOVA

Betadisper and permutational ANOVA

Here we are going to test if there is a statistically signficant difference between our sample types. One way to do this is with the betadisper and adonis functions from the vegan package. adonis can tell us if there is a statistical difference between groups. However, adonis assumes that there is no statistical difference within the same group. To check whether this assumption is met, we first need to perfom a betadisper permutest. If the p-value is non-significant (p-value > 0.05), there is a sufficient level of homogeneity of dispersion within groups. If there is not, then the adonis permanova test can be unreliable.

7.2.0.1 Group

set.seed(0503)

# Betadisper Permutest 
bray_dist = phyloseq::distance(vst_physeq, method="bray", weighted=T)

disp.age = betadisper(bray_dist, sample_data(vst_physeq)$Group)

disp.age$call
## betadisper(d = bray_dist, group = sample_data(vst_physeq)$Group)
permutest(disp.age, pairwise=TRUE, permutations=1000)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 1000
## 
## Response: Distances
##           Df   Sum Sq   Mean Sq      F N.Perm  Pr(>F)  
## Groups     2 0.024931 0.0124656 5.3798   1000 0.01199 *
## Residuals 42 0.097318 0.0023171                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
##              Donor        FMT Placebo
## Donor              0.00199800  0.1129
## FMT     0.00015247             0.0929
## Placebo 0.12415345 0.09974887

The p-value of the betadisper permutest is lower than 0.05 so we can’t assume that sample groups are sufficiently homogenous and we can’t trust the adonis permanova test.

set.seed(0503)

# Adonis PERMANOVA analysis
adonis2(bray_dist ~ sample_data(vst_physeq)$Group)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = bray_dist ~ sample_data(vst_physeq)$Group)
##                               Df SumOfSqs      R2      F Pr(>F)  
## sample_data(vst_physeq)$Group  2   1.0872 0.05519 1.2268  0.026 *
## Residual                      42  18.6112 0.94481                
## Total                         44  19.6984 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The p-value of the Adonis permanova test is lower than 0.05 but we can’t trust this value considering that homegeneity within same groups was not proven.

7.2.0.2 group_week

set.seed(0503)

# Betadisper Permutest

bray_dist = phyloseq::distance(vst_physeq, method="bray", weighted=T)

disp.age = betadisper(bray_dist, sample_data(vst_physeq)$group_week)

disp.age$call
## betadisper(d = bray_dist, group = sample_data(vst_physeq)$group_week)
permutest(disp.age, pairwise=TRUE, permutations=1000)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 1000
## 
## Response: Distances
##           Df   Sum Sq   Mean Sq      F N.Perm  Pr(>F)  
## Groups     4 0.019846 0.0049616 2.2324   1000 0.06893 .
## Residuals 40 0.088903 0.0022226                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
##                Donor_Week_0 FMT_Week_0 FMT_Week_7 Placebo_Week_0 Placebo_Week_7
## Donor_Week_0                 0.0039960  0.0039960      0.1398601         0.2278
## FMT_Week_0        0.0050222             0.6993007      0.2237762         0.2078
## FMT_Week_7        0.0030283  0.7207732                 0.2827173         0.2647
## Placebo_Week_0    0.1479421  0.2150136  0.2867407                        0.9231
## Placebo_Week_7    0.2464497  0.2151499  0.2812852      0.9222311

The p-value of the betadisper permutest is higher than 0.05 so we can assume that sample groups are sufficiently homogenous and we can carry on with the adonis permanova test.

# Adonis PERMANOVA pairwise analysis

ps_norm_df <- as(sample_data(vst_physeq), "data.frame")

pairwise.adonis(bray_dist,ps_norm_df$group_week)
##                               pairs Df SumsOfSqs   F.Model         R2 p.value
## 1        FMT_Week_0 vs Donor_Week_0  1 0.5154288 1.1240585 0.07432254   0.173
## 2          FMT_Week_0 vs FMT_Week_7  1 0.4156883 0.9219378 0.03699302   0.663
## 3      FMT_Week_0 vs Placebo_Week_0  1 0.4063794 0.9030803 0.04537390   0.784
## 4      FMT_Week_0 vs Placebo_Week_7  1 0.4079844 0.9090391 0.04565962   0.755
## 5        Donor_Week_0 vs FMT_Week_7  1 0.4751350 1.0514915 0.06985962   0.268
## 6    Donor_Week_0 vs Placebo_Week_0  1 0.5251980 1.1657379 0.11467322   0.131
## 7    Donor_Week_0 vs Placebo_Week_7  1 0.5284258 1.1794470 0.11586553   0.107
## 8      FMT_Week_7 vs Placebo_Week_0  1 0.4363533 0.9804058 0.04906836   0.496
## 9      FMT_Week_7 vs Placebo_Week_7  1 0.4930383 1.1107218 0.05523033   0.181
## 10 Placebo_Week_0 vs Placebo_Week_7  1 0.2494544 0.5667102 0.03890447   0.990
##    p.adjusted sig
## 1           1    
## 2           1    
## 3           1    
## 4           1    
## 5           1    
## 6           1    
## 7           1    
## 8           1    
## 9           1    
## 10          1

None of the p-values from the pairwise Adonis permanova test resulted higher than 0.05 so we can’t prove that there is a statistically significant difference between any of the groups.

8 Differential Abundance Analysis

8.1 DESeq2 Differential Analysis with Microbial

Find features that are up or down represented in the compared groups using the microbial package that implements DESeq2. - The difftest function from the microbial package finds features that are up or down represented in the compared groups. The difftest function requires a phyloseq object containing merged information of abundance, taxonomic assignment and sample data including the measured variables and categorical information of the samples. Since the DESeq2 transformation is performed in this function, raw count values are preferred. In this context, we will be using the ps_raw phyloseq object and its subsets.
- The plotdiff function produces a figure with the top most annotated features with corresponding adjusted p-values and abundance distribution with respect to control conditions.

The group parameter is a character string specifying the name of a categorical variable containing grouping information. The pvalue and log2FC are thresholds for p values and log2 fold change. Adjusted p value cutoff is also supported by specify the padj paramater.

8.1.1 According to Week Condition between the FMT subjects (Week 7 vs Week 0)

res1 <- difftest(ps_raw_FMT, group= "week", ref = "Week_0")
res1$Phylum <- res1$"tax[rownames(res), ]" 
res1$KO <- res1$Phylum
write.table(res1,"results_deseq2_Week7vsWeek0_FMT.tsv", sep = "\t", quote = F, row.names = FALSE) 
# KO
plotdiff(res1, level="Phylum", pvalue=0.05, log2FC = 4) + geom_hline(yintercept = 0, linetype="dotted")+ geom_point(size = 4) +
  labs(y = "\nLog2 Fold-Change for Week_7 vs Week_0 in FMT subjects \n pvalue >= 0.05, log2FC >= 4", x = "") +
  theme(axis.text.x = element_text(color = "black", size = 10),
        axis.text.y = element_text(color = "black", size = 10),
        axis.title.y = element_text(size = 12),
        axis.title.x = element_text(size = 12),
        legend.position =  "none") +
  geom_hline(yintercept = 0, linetype="dotted")

8.1.2 According to Week Condition between the Placebo subjects (Week 7 vs Week 0)

res2 <- difftest(ps_raw_placebo, group= "week", ref = "Week_0")
res2$Phylum <- res2$"tax[rownames(res), ]" 
res2$KO <- res2$Phylum
write.table(res2,"results_deseq2_Week7vsWeek0_placebo.tsv", sep = "\t", quote = F, row.names = FALSE) 
# KO
plotdiff(res2, level="Phylum", pvalue=0.05, log2FC = 4) + geom_hline(yintercept = 0, linetype="dotted")+ geom_point(size = 4) +
  labs(y = "\nLog2 Fold-Change for Week_7 vs Week_0 in Placebo subjects \n pvalue >= 0.05 , log2FC >= 4", x = "") +
  theme(axis.text.x = element_text(color = "black", size = 10),
        axis.text.y = element_text(color = "black", size = 10),
        axis.title.y = element_text(size = 12),
        axis.title.x = element_text(size = 12),
        legend.position =  "none") +
  geom_hline(yintercept = 0, linetype="dotted")

8.1.3 According to Group Condition between samples collected at Week 0 (FMT vs Placebo)

res3 <- difftest(ps_raw_week0, group= "Group", ref = "Placebo")
res3$Phylum <- res3$"tax[rownames(res), ]" 
res3$KO <- res3$Phylum

write.table(res3,"results_deseq2_FMTvsPlacebo_week0.tsv", sep = "\t", quote = F, row.names = FALSE) 
# KO
plotdiff(res3, level="Phylum", pvalue=0.05, log2FC = 4) + geom_hline(yintercept = 0, linetype="dotted")+ geom_point(size = 4) +
  labs(y = "\nLog2 Fold-Change for FMT vs Placebo subjects in samples collected at Week 0 \n pvalue = 0.05 , log2FC >= 4", x = "") +
  theme(axis.text.x = element_text(color = "black", size = 12),
        axis.text.y = element_text(color = "black", size = 12),
        axis.title.y = element_text(size = 14),
        axis.title.x = element_text(size = 14),
        legend.position =  "none") +
  geom_hline(yintercept = 0, linetype="dotted")

8.1.4 According to Group Condition between samples collected at Week 7 (FMT vs Placebo)

res4 <- difftest(ps_raw_week7, group= "Group", ref = "Placebo")
res4$Phylum <- res4$"tax[rownames(res), ]" 
res4$KO <- res4$Phylum

write.table(res4,"results_deseq2_FMTvsPlacebo_week7.tsv", sep = "\t", quote = F, row.names = FALSE) 
# KO
plotdiff(res4, level="Phylum", pvalue=0.05, log2FC = 4) + geom_hline(yintercept = 0, linetype="dotted")+ geom_point(size = 4) +
  labs(y = "\nLog2 Fold-Change for FMT vs Placebo subjects in samples collected at Week 7 \n pvalue = 0.05 , log2FC >= 4", x = "") +
  theme(axis.text.x = element_text(color = "black", size = 12),
        axis.text.y = element_text(color = "black", size = 12),
        axis.title.y = element_text(size = 14),
        axis.title.x = element_text(size = 14),
        legend.position =  "none") +
  geom_hline(yintercept = 0, linetype="dotted")

8.1.5 Venn diagrams

Venn diagrams to perform further comparisons are designed.

## FMT
#results_deseq2_Week7vsWeek0_FMT_des <- read.delim("~/Desktop/RyC_TFM/final_results/results_deseq2_Week7vsWeek0_FMT_des.tsv", row.names=1)
results_deseq2_Week7vsWeek0_FMT_des <- read.delim("results_deseq2_Week7vsWeek0_FMT.tsv")
results_deseq2_Week7vsWeek0_FMT_des_SIGNI <- results_deseq2_Week7vsWeek0_FMT_des %>% filter (results_deseq2_Week7vsWeek0_FMT_des$pvalue <= 0.05)


## PLACEBO
results_deseq2_Week7vsWeek0_Placebo_des <- read.delim("results_deseq2_Week7vsWeek0_placebo.tsv")
results_deseq2_Week7vsWeek0_Placebo_des_SIGNI <- results_deseq2_Week7vsWeek0_Placebo_des %>% filter (results_deseq2_Week7vsWeek0_Placebo_des$pvalue <= 0.05)

## To create both lists
A = results_deseq2_Week7vsWeek0_FMT_des_SIGNI %>% dplyr::select(KO) %>% unlist()
B = results_deseq2_Week7vsWeek0_Placebo_des_SIGNI %>% dplyr::select(KO) %>% unlist()
## Venn Diagrams

vennPlot <- venn.diagram(list( A = A, B = B),fill = c("forestgreen", "darkorchid4"),  na = "remove", category.names = c("Week 7 vs Week 0 in FMT subjects", "Week 7 vs Week 0 in Placebo subjects"), alpha = c(0.4, 0.4), cex = 1, cat.fontface = 4,lty =2, fontfamily =3, filename = NULL, cat.dist = c(0.03, 0.03),
    cat.pos = c(1, 4 )) ; grid.newpage(); grid.draw(vennPlot) 

## W0
results_deseq2_FMTvsPlacebo_week0_des <- read.delim("results_deseq2_FMTvsPlacebo_week0.tsv")
results_deseq2_FMTvsPlacebo_week0_des_SIGNI <- results_deseq2_FMTvsPlacebo_week0_des %>% filter(results_deseq2_FMTvsPlacebo_week0_des$pvalue <= 0.05)


## W7
results_deseq2_FMTvsPlacebo_week7_des <- read.delim("results_deseq2_FMTvsPlacebo_week7.tsv")
results_deseq2_FMTvsPlacebo_week7_des_SIGNI <- results_deseq2_FMTvsPlacebo_week7_des %>% filter (results_deseq2_FMTvsPlacebo_week7_des$pvalue <= 0.05)

## To create both lists
A = results_deseq2_FMTvsPlacebo_week0_des_SIGNI %>% dplyr::select(KO) %>% unlist()
B = results_deseq2_FMTvsPlacebo_week7_des_SIGNI %>% dplyr::select(KO) %>% unlist()  
## Venn Diagrams


vennPlot <- venn.diagram(list( A = A, B = B),fill = c("brown2", "darkgoldenrod2"),  na = "remove", category.names = c("FMT vs Placebo in Week 0" , "FMT vs Placebo in Week 7"), alpha = c(0.4, 0.4), cex = 1, cat.fontface = 4,lty =2, fontfamily =3, filename = NULL, cat.dist = c(0.02, 0.02), cat.pos = c(-15, -3)) ; grid.newpage(); grid.draw(vennPlot)